Collection of Tools & Utilities

home *** CD-ROM | disk | FTP | other *** search

/ Collection of Tools & Utilities / Collection of Tools and Utilities.iso / edit / pt20pc.zip / RE2.ASM < prev next >

Wrap

Assembly Source File | 1991-02-04 | 7KB | 292 lines

; ; al = input character ; di = nlist tail ; si = clist tail ; dx = clist head ; RE_TEXT SEGMENT WORD PUBLIC 'CODE' assume cs:RE_TEXT assume ds:RE_TEXT assume es:RE_TEXT extrn _reFast1:BYTE extrn _reFast2:BYTE ; ; XTAG:re_scan ; re_scan( char * beginBuffer, char * endBuffer, char *segBuffer, ; char ** matchBegin, char ** matchEnd, ; int * numberOfNewlines ); ; ; returns: ; 0 = NOT_FOUND -- RE was not found between beginBuffer ; and endBuffer ; 1 = FOUND -- RE was found between matchBegin and matchEnd ; 2 = PARTIAL_MATCH -- partial match at the end of the buffer ; starting at matchBegin ; public _re_scan _re_scan PROC FAR push bp ; save bp mov bp,sp ; set up bp to access the arguments ; mov cs:dataSegment,ds ; save ds ; push ds ; save ds and es push es push si ; save si and di push di ; push cs ; have all the segment registers point to pop ds ; the beginning of the code segment push cs pop es ; mov ax,[bp+6] ; pick up and save the arguments mov cs:nextByteOffset,ax mov cs:firstByteOffset,ax mov ax,[bp+8] mov cs:lastByteOffset,ax ; move the segment into a position so GetNextChar can pick it up with a LDS mov ax,[bp+10] mov cs:nextByteSegment,ax ; init: cld ; set search direction to forward lea di,list1 ; initially list1=nlist and list2=clist mov nlistTop,di ; but they are swapped after each character lea si,list2 mov clistTop,si mov dx,si ; dx = clist head mov al,0AH ; "last char read" at beginning is a NL jmp myxchg ; ; finish: ; count the number of lines in the scanned bytes push ax ; save ax since we need to change al mov di,cs:firstByteOffset ; start the scan here mov cx,cs:nextByteOffset ; compute the number of bytes to scan mov ds,cs:nextByteSegment mov es,cs:nextByteSegment sub cx,di ; cx = the number of bytes mov al,0AH ; 0AH = newline, ASCII line feed xor dx,dx ; newline counter, start at 0 (of course) NLLoop: repne scasb ; scan to the next newline jne noMoreNLs ; go to end of buffer w/o finding a newline inc dx ; found another one, bump the counter cmp cx,0 jne NLLoop ; find any more noMoreNLs: mov ds,cs:dataSegment mov bx,[bp+16] ; get address of int to put newline count in mov [bx],dx ; store the computed newline count ; end of newline counting ; pop ax ; restore ax -- the return value pop di pop si pop es pop ds pop bp ; restore bp ret _re_scan ENDP ; public _re_cnode _re_cnode: pop bx ; get the return address mov [si],bx ; save it in the clist mov cx,thisMatchBegin mov [si+2],cx add si,4 ; mov clist tail (si) add bx,3 ; generate bx+3 as an address push bx ; push it so we can "JMP" to it ret ; with a RET. We cannot use JMP 3[bx] ; directly since it will be indirect ; public _re_nnode extrn _addrJmpFound:WORD _re_nnode: pop bx ; get the return address cmp bx,_addrJmpFound ; did we find it? je _re_found mov [di],bx ; save it in the nlist mov cx,thisMatchBegin mov [di+2],cx add di,4 ; mov nlist tail (di) ; then move to next clist item ; public _re_clist _re_clist: cmp dx,si je myxchg ; head=tail => clist is empty mov bx,dx ; move address into base register mov cx,[bx+2] mov thisMatchBegin,cx add dx,4 ; move to the next item jmp [bx] ; public _re_found _re_found: mov cx,thisMatchBegin dec cx ; since it was taken from nextByteOffset ; and so is one to big mov ds,cs:dataSegment mov bx,[bp+12] mov [bx],cx ; store begin address of found RE mov cx,cs:nextByteOffset dec cx ; the RE ends here mov bx,[bp+14] mov [bx],cx ; store end address of found RE mov ax,1 ; FOUND flag is returned in ax jmp finish ; ; myxchg: cmp di,nlistTop jne skipFastScan ; ; if this char is 0 then do not try fast searches, if it is not 0 then the ; first character of the RE is a single fixed character ; cmp _reFast1,0 je skipFastScan ; ; If there is no pending list of partial RE matches, then try to speed up ; the search by scanning for the first character of the RE. The 8086 string ; search instructions are very fast for looking for a single character. ; ; get the parameters for the string scan push ax ; save last char read (in al) mov al,_reFast1 les di,DWORD PTR nextByteOffset mov cx,lastByteOffset cmp cx,di jbe noScan sub cx,di inc cx ; since lastByteOffset points to a valid byte mov dx,cx ; save cx for the second scan repne scasb jne notFound1 ; distinguish: not found - found at last char dec di ; adjust since repne scasb goes one too far notFound1: mov cx,dx ; restore the count we saved mov dx,di ; save the results of the first scasb in dx mov al,_reFast2 ; get the second fast scan character jnz doSecondScan ; only look for chars not equal to '\0' mov di,lastByteOffset ; make sure this is not the lowest jmp notFound2 doSecondScan: mov di,WORD PTR nextByteOffset repne scasb jne notFound2 dec di notFound2: ; use the one that came first (the lower one) cmp di,dx jbe useSecond mov di,dx useSecond: mov nextByteOffset,di noScan: push cs pop es ; restore es (NECESSARY since we use DI) mov di,nlistTop ; restore di pop ax ; restore last char read (back into al) ; ; skipFastScan: mov si,di ; set new clist tail mov di,clistTop ; set new nlist tail mov bx,nlistTop ; set up to exchange clist and nlist mov nlistTop,di ; now do the reverse mov clistTop,bx ; reversing is faster than moving the lists mov dx,bx ; start adding at the top of the clist ; ; get the next character ; lds bx,DWORD PTR nextByteOffset cmp bx,cs:lastByteOffset ja endOfSpan ; use unsigned comparison mov BYTE PTR cs:_re_sidechars,al ; save last char read mov al,[bx+1] ; get char after the next one mov BYTE PTR cs:_re_sidechars+1,al ; save next char to read mov al,[bx] ; get the next character (finally) push cs ; restore DS to equal CS pop ds inc bx cmp bx,lastByteOffset ; are we at the end of the buffer? jb notAtEnd mov ah,0AH ; if so, simulate a NL as the next char mov BYTE PTR _re_sidechars+1,ah notAtEnd: mov nextbyteOffset,bx mov thisMatchBegin,bx jmp _re_code ; endOfSpan: ; mov es,cs:nextByteSegment cmp dx,si je noClist mov bx,dx mov dx,cs:[bx+2] loop1: add bx,4 cmp bx,si je endLoop cmp dx,cs:[bx+2] jbe loop1 mov dx,cs:[bx+2] jmp loop1 endLoop: mov ds,cs:dataSegment ; restore ds dec dx ; taken from nextByteOffset and so 1 too high mov bx,[bp+12] mov [bx],dx mov ax,2 jmp over1 noClist: xor ax,ax over1: jmp finish ; even public _re_code _re_code: db 1000 DUP (0) ; even nlistTop dw 0 clistTop dw 0 firstByteOffset dw 0 ; These next words two must stay in this order since they are picked up with ; an LDS instruction that wants then in this order in two consecutive words nextByteOffset dw 0 nextByteSegment dw 0 dataSegment dw 0 ; ; Here we will keep the last character read and the next character to be read ; public _re_sidechars _re_sidechars dw 0 ; lastByteOffset dw 0 thisMatchBegin dw 0 ; public _re_wordtable _re_wordtable db 0, 0, 0, 0, 0, 0, -1, 3 db -2, -1, -1, 7, -2, -1, -1, 7 db 0, 0, 0, 0, 0, 0, 0, 0 db 0, 0, 0, 0, 0, 0, 0, 0 ; even list1: db 400 DUP (0) ; even list2: db 400 DUP (0) ; RE_TEXT ENDS end